# Copyright 2025 ZTE Corporation.
# All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import os

from typing import List, Optional

import pandas as pd


def extract_excel_content(document_path: str):
    result = extract_excel_content(document_path)
    return result


def _convert_to_markdown(df: pd.DataFrame) -> str:
    r"""Convert DataFrame to Markdown format table.

    Args:
        df (pd.DataFrame): DataFrame containing the Excel data.

    Returns:
        str: Markdown formatted table.
    """
    from tabulate import tabulate

    md_table = tabulate(df, headers='keys', tablefmt='pipe')
    return str(md_table)


def extract_excel_content(document_path: str) -> str:
    r"""Extract detailed cell information from an Excel file, including
    multiple sheets.

    Args:
        document_path (str): The path of the Excel file.

    Returns:
        str: Extracted excel information, including details of each sheet.
    """
    from openpyxl import load_workbook
    from xls2xlsx import XLS2XLSX

    print(
        f"Calling extract_excel_content with document_path"
        f": {document_path}"
    )

    if not (
            document_path.endswith("xls")
            or document_path.endswith("xlsx")
            or document_path.endswith("csv")
    ):
        print("Only xls, xlsx, csv files are supported.")
        return (
            f"Failed to process file {document_path}: "
            f"It is not excel format. Please try other ways."
        )

    if document_path.endswith("csv"):
        try:
            df = pd.read_csv(document_path)
            md_table = _convert_to_markdown(df)
            return f"CSV File Processed:\n{md_table}"
        except Exception as e:
            print(f"Failed to process file {document_path}: {e}")
            return f"Failed to process file {document_path}: {e}"

    if document_path.endswith("xls"):
        output_path = document_path.replace(".xls", ".xlsx")
        x2x = XLS2XLSX(document_path)
        x2x.to_xlsx(output_path)
        document_path = output_path

    # Load the Excel workbook
    wb = load_workbook(document_path, data_only=True)
    sheet_info_list = []

    # Iterate through all sheets
    for sheet in wb.sheetnames:
        ws = wb[sheet]
        cell_info_list = []

        for row in ws.iter_rows():
            for cell in row:
                row_num = cell.row
                col_letter = cell.column_letter

                cell_value = cell.value

                font_color = None
                if (
                        cell.font
                        and cell.font.color
                        and "rgb=None" not in str(cell.font.color)
                ):  # Handle font color
                    font_color = cell.font.color.rgb

                fill_color = None
                if (
                        cell.fill
                        and cell.fill.fgColor
                        and "rgb=None" not in str(cell.fill.fgColor)
                ):  # Handle fill color
                    fill_color = cell.fill.fgColor.rgb

                cell_info_list.append(
                    {
                        "index": f"{row_num}{col_letter}",
                        "value": cell_value,
                        "font_color": font_color,
                        "fill_color": fill_color,
                    }
                )

        # Convert the sheet to a DataFrame and then to markdown
        sheet_df = pd.read_excel(
            document_path, sheet_name=sheet, engine='openpyxl'
        )
        markdown_content = _convert_to_markdown(sheet_df)

        # Collect all information for the sheet
        sheet_info = {
            "sheet_name": sheet,
            "cell_info_list": cell_info_list,
            "markdown_content": markdown_content,
        }
        sheet_info_list.append(sheet_info)

    result_str = ""
    for sheet_info in sheet_info_list:
        result_str += f"""
            Sheet Name: {sheet_info['sheet_name']}
            Cell information list:
            {sheet_info['cell_info_list']}

            Markdown View of the content:
            {sheet_info['markdown_content']}

            {'-' * 40}
            """

    return result_str



